import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv(r'C:\Users\Sandeep Immadi\Downloads\Education+-+Post+12th+Standard.csv')
df.head()
| Names | Apps | Accept | Enroll | Top10perc | Top25perc | F.Undergrad | P.Undergrad | Outstate | Room.Board | Books | Personal | PhD | Terminal | S.F.Ratio | perc.alumni | Expend | Grad.Rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Abilene Christian University | 1660 | 1232 | 721 | 23 | 52 | 2885 | 537 | 7440 | 3300 | 450 | 2200 | 70 | 78 | 18.1 | 12 | 7041 | 60 |
| 1 | Adelphi University | 2186 | 1924 | 512 | 16 | 29 | 2683 | 1227 | 12280 | 6450 | 750 | 1500 | 29 | 30 | 12.2 | 16 | 10527 | 56 |
| 2 | Adrian College | 1428 | 1097 | 336 | 22 | 50 | 1036 | 99 | 11250 | 3750 | 400 | 1165 | 53 | 66 | 12.9 | 30 | 8735 | 54 |
| 3 | Agnes Scott College | 417 | 349 | 137 | 60 | 89 | 510 | 63 | 12960 | 5450 | 450 | 875 | 92 | 97 | 7.7 | 37 | 19016 | 59 |
| 4 | Alaska Pacific University | 193 | 146 | 55 | 16 | 44 | 249 | 869 | 7560 | 4120 | 800 | 1500 | 76 | 72 | 11.9 | 2 | 10922 | 15 |
df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| Apps | 777.0 | 3001.638353 | 3870.201484 | 81.0 | 776.0 | 1558.0 | 3624.0 | 48094.0 |
| Accept | 777.0 | 2018.804376 | 2451.113971 | 72.0 | 604.0 | 1110.0 | 2424.0 | 26330.0 |
| Enroll | 777.0 | 779.972973 | 929.176190 | 35.0 | 242.0 | 434.0 | 902.0 | 6392.0 |
| Top10perc | 777.0 | 27.558559 | 17.640364 | 1.0 | 15.0 | 23.0 | 35.0 | 96.0 |
| Top25perc | 777.0 | 55.796654 | 19.804778 | 9.0 | 41.0 | 54.0 | 69.0 | 100.0 |
| F.Undergrad | 777.0 | 3699.907336 | 4850.420531 | 139.0 | 992.0 | 1707.0 | 4005.0 | 31643.0 |
| P.Undergrad | 777.0 | 855.298584 | 1522.431887 | 1.0 | 95.0 | 353.0 | 967.0 | 21836.0 |
| Outstate | 777.0 | 10440.669241 | 4023.016484 | 2340.0 | 7320.0 | 9990.0 | 12925.0 | 21700.0 |
| Room.Board | 777.0 | 4357.526384 | 1096.696416 | 1780.0 | 3597.0 | 4200.0 | 5050.0 | 8124.0 |
| Books | 777.0 | 549.380952 | 165.105360 | 96.0 | 470.0 | 500.0 | 600.0 | 2340.0 |
| Personal | 777.0 | 1340.642214 | 677.071454 | 250.0 | 850.0 | 1200.0 | 1700.0 | 6800.0 |
| PhD | 777.0 | 72.660232 | 16.328155 | 8.0 | 62.0 | 75.0 | 85.0 | 103.0 |
| Terminal | 777.0 | 79.702703 | 14.722359 | 24.0 | 71.0 | 82.0 | 92.0 | 100.0 |
| S.F.Ratio | 777.0 | 14.089704 | 3.958349 | 2.5 | 11.5 | 13.6 | 16.5 | 39.8 |
| perc.alumni | 777.0 | 22.743887 | 12.391801 | 0.0 | 13.0 | 21.0 | 31.0 | 64.0 |
| Expend | 777.0 | 9660.171171 | 5221.768440 | 3186.0 | 6751.0 | 8377.0 | 10830.0 | 56233.0 |
| Grad.Rate | 777.0 | 65.463320 | 17.177710 | 10.0 | 53.0 | 65.0 | 78.0 | 118.0 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 777 entries, 0 to 776 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Names 777 non-null object 1 Apps 777 non-null int64 2 Accept 777 non-null int64 3 Enroll 777 non-null int64 4 Top10perc 777 non-null int64 5 Top25perc 777 non-null int64 6 F.Undergrad 777 non-null int64 7 P.Undergrad 777 non-null int64 8 Outstate 777 non-null int64 9 Room.Board 777 non-null int64 10 Books 777 non-null int64 11 Personal 777 non-null int64 12 PhD 777 non-null int64 13 Terminal 777 non-null int64 14 S.F.Ratio 777 non-null float64 15 perc.alumni 777 non-null int64 16 Expend 777 non-null int64 17 Grad.Rate 777 non-null int64 dtypes: float64(1), int64(16), object(1) memory usage: 109.4+ KB
df.isnull().sum()
Names 0 Apps 0 Accept 0 Enroll 0 Top10perc 0 Top25perc 0 F.Undergrad 0 P.Undergrad 0 Outstate 0 Room.Board 0 Books 0 Personal 0 PhD 0 Terminal 0 S.F.Ratio 0 perc.alumni 0 Expend 0 Grad.Rate 0 dtype: int64
dups = df.duplicated()
print('Number of duplicate rows = %d' % (dups.sum()))
Number of duplicate rows = 0
data_new = df.drop(['Names'], axis =1)
data_new.head()
| Apps | Accept | Enroll | Top10perc | Top25perc | F.Undergrad | P.Undergrad | Outstate | Room.Board | Books | Personal | PhD | Terminal | S.F.Ratio | perc.alumni | Expend | Grad.Rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1660 | 1232 | 721 | 23 | 52 | 2885 | 537 | 7440 | 3300 | 450 | 2200 | 70 | 78 | 18.1 | 12 | 7041 | 60 |
| 1 | 2186 | 1924 | 512 | 16 | 29 | 2683 | 1227 | 12280 | 6450 | 750 | 1500 | 29 | 30 | 12.2 | 16 | 10527 | 56 |
| 2 | 1428 | 1097 | 336 | 22 | 50 | 1036 | 99 | 11250 | 3750 | 400 | 1165 | 53 | 66 | 12.9 | 30 | 8735 | 54 |
| 3 | 417 | 349 | 137 | 60 | 89 | 510 | 63 | 12960 | 5450 | 450 | 875 | 92 | 97 | 7.7 | 37 | 19016 | 59 |
| 4 | 193 | 146 | 55 | 16 | 44 | 249 | 869 | 7560 | 4120 | 800 | 1500 | 76 | 72 | 11.9 | 2 | 10922 | 15 |
data_new.shape
(777, 17)
columns_names=data_new.columns.tolist()
print("Columns names:")
print(columns_names)
Columns names: ['Apps', 'Accept', 'Enroll', 'Top10perc', 'Top25perc', 'F.Undergrad', 'P.Undergrad', 'Outstate', 'Room.Board', 'Books', 'Personal', 'PhD', 'Terminal', 'S.F.Ratio', 'perc.alumni', 'Expend', 'Grad.Rate']
data_new.skew()
Apps 3.723750 Accept 3.417727 Enroll 2.690465 Top10perc 1.413217 Top25perc 0.259340 F.Undergrad 2.610458 P.Undergrad 5.692353 Outstate 0.509278 Room.Board 0.477356 Books 3.485025 Personal 1.742497 PhD -0.768170 Terminal -0.816542 S.F.Ratio 0.667435 perc.alumni 0.606891 Expend 3.459322 Grad.Rate -0.113777 dtype: float64
data_new.kurt()
Apps 26.774253 Accept 18.938099 Enroll 8.831544 Top10perc 2.208065 Top25perc -0.564121 F.Undergrad 7.696586 P.Undergrad 55.034518 Outstate -0.413832 Room.Board -0.187553 Books 28.333097 Personal 7.124017 PhD 0.564773 Terminal 0.242019 S.F.Ratio 2.561209 perc.alumni -0.096807 Expend 18.771500 Grad.Rate -0.205226 dtype: float64
data_new.hist(figsize=(20,30))
array([[<AxesSubplot:title={'center':'Apps'}>,
<AxesSubplot:title={'center':'Accept'}>,
<AxesSubplot:title={'center':'Enroll'}>,
<AxesSubplot:title={'center':'Top10perc'}>],
[<AxesSubplot:title={'center':'Top25perc'}>,
<AxesSubplot:title={'center':'F.Undergrad'}>,
<AxesSubplot:title={'center':'P.Undergrad'}>,
<AxesSubplot:title={'center':'Outstate'}>],
[<AxesSubplot:title={'center':'Room.Board'}>,
<AxesSubplot:title={'center':'Books'}>,
<AxesSubplot:title={'center':'Personal'}>,
<AxesSubplot:title={'center':'PhD'}>],
[<AxesSubplot:title={'center':'Terminal'}>,
<AxesSubplot:title={'center':'S.F.Ratio'}>,
<AxesSubplot:title={'center':'perc.alumni'}>,
<AxesSubplot:title={'center':'Expend'}>],
[<AxesSubplot:title={'center':'Grad.Rate'}>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>]], dtype=object)
plt.figure(figsize = (20,8))
data_new.boxplot()
<AxesSubplot:>
plt.figure(figsize=(15,10))
sns.heatmap(df.corr(),annot=True,fmt=".2f");
df.corr()
| Apps | Accept | Enroll | Top10perc | Top25perc | F.Undergrad | P.Undergrad | Outstate | Room.Board | Books | Personal | PhD | Terminal | S.F.Ratio | perc.alumni | Expend | Grad.Rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Apps | 1.000000 | 0.943451 | 0.846822 | 0.338834 | 0.351640 | 0.814491 | 0.398264 | 0.050159 | 0.164939 | 0.132559 | 0.178731 | 0.390697 | 0.369491 | 0.095633 | -0.090226 | 0.259592 | 0.146755 |
| Accept | 0.943451 | 1.000000 | 0.911637 | 0.192447 | 0.247476 | 0.874223 | 0.441271 | -0.025755 | 0.090899 | 0.113525 | 0.200989 | 0.355758 | 0.337583 | 0.176229 | -0.159990 | 0.124717 | 0.067313 |
| Enroll | 0.846822 | 0.911637 | 1.000000 | 0.181294 | 0.226745 | 0.964640 | 0.513069 | -0.155477 | -0.040232 | 0.112711 | 0.280929 | 0.331469 | 0.308274 | 0.237271 | -0.180794 | 0.064169 | -0.022341 |
| Top10perc | 0.338834 | 0.192447 | 0.181294 | 1.000000 | 0.891995 | 0.141289 | -0.105356 | 0.562331 | 0.371480 | 0.118858 | -0.093316 | 0.531828 | 0.491135 | -0.384875 | 0.455485 | 0.660913 | 0.494989 |
| Top25perc | 0.351640 | 0.247476 | 0.226745 | 0.891995 | 1.000000 | 0.199445 | -0.053577 | 0.489394 | 0.331490 | 0.115527 | -0.080810 | 0.545862 | 0.524749 | -0.294629 | 0.417864 | 0.527447 | 0.477281 |
| F.Undergrad | 0.814491 | 0.874223 | 0.964640 | 0.141289 | 0.199445 | 1.000000 | 0.570512 | -0.215742 | -0.068890 | 0.115550 | 0.317200 | 0.318337 | 0.300019 | 0.279703 | -0.229462 | 0.018652 | -0.078773 |
| P.Undergrad | 0.398264 | 0.441271 | 0.513069 | -0.105356 | -0.053577 | 0.570512 | 1.000000 | -0.253512 | -0.061326 | 0.081200 | 0.319882 | 0.149114 | 0.141904 | 0.232531 | -0.280792 | -0.083568 | -0.257001 |
| Outstate | 0.050159 | -0.025755 | -0.155477 | 0.562331 | 0.489394 | -0.215742 | -0.253512 | 1.000000 | 0.654256 | 0.038855 | -0.299087 | 0.382982 | 0.407983 | -0.554821 | 0.566262 | 0.672779 | 0.571290 |
| Room.Board | 0.164939 | 0.090899 | -0.040232 | 0.371480 | 0.331490 | -0.068890 | -0.061326 | 0.654256 | 1.000000 | 0.127963 | -0.199428 | 0.329202 | 0.374540 | -0.362628 | 0.272363 | 0.501739 | 0.424942 |
| Books | 0.132559 | 0.113525 | 0.112711 | 0.118858 | 0.115527 | 0.115550 | 0.081200 | 0.038855 | 0.127963 | 1.000000 | 0.179295 | 0.026906 | 0.099955 | -0.031929 | -0.040208 | 0.112409 | 0.001061 |
| Personal | 0.178731 | 0.200989 | 0.280929 | -0.093316 | -0.080810 | 0.317200 | 0.319882 | -0.299087 | -0.199428 | 0.179295 | 1.000000 | -0.010936 | -0.030613 | 0.136345 | -0.285968 | -0.097892 | -0.269344 |
| PhD | 0.390697 | 0.355758 | 0.331469 | 0.531828 | 0.545862 | 0.318337 | 0.149114 | 0.382982 | 0.329202 | 0.026906 | -0.010936 | 1.000000 | 0.849587 | -0.130530 | 0.249009 | 0.432762 | 0.305038 |
| Terminal | 0.369491 | 0.337583 | 0.308274 | 0.491135 | 0.524749 | 0.300019 | 0.141904 | 0.407983 | 0.374540 | 0.099955 | -0.030613 | 0.849587 | 1.000000 | -0.160104 | 0.267130 | 0.438799 | 0.289527 |
| S.F.Ratio | 0.095633 | 0.176229 | 0.237271 | -0.384875 | -0.294629 | 0.279703 | 0.232531 | -0.554821 | -0.362628 | -0.031929 | 0.136345 | -0.130530 | -0.160104 | 1.000000 | -0.402929 | -0.583832 | -0.306710 |
| perc.alumni | -0.090226 | -0.159990 | -0.180794 | 0.455485 | 0.417864 | -0.229462 | -0.280792 | 0.566262 | 0.272363 | -0.040208 | -0.285968 | 0.249009 | 0.267130 | -0.402929 | 1.000000 | 0.417712 | 0.490898 |
| Expend | 0.259592 | 0.124717 | 0.064169 | 0.660913 | 0.527447 | 0.018652 | -0.083568 | 0.672779 | 0.501739 | 0.112409 | -0.097892 | 0.432762 | 0.438799 | -0.583832 | 0.417712 | 1.000000 | 0.390343 |
| Grad.Rate | 0.146755 | 0.067313 | -0.022341 | 0.494989 | 0.477281 | -0.078773 | -0.257001 | 0.571290 | 0.424942 | 0.001061 | -0.269344 | 0.305038 | 0.289527 | -0.306710 | 0.490898 | 0.390343 | 1.000000 |
sns.pairplot(df,diag_kind='kde')
plt.show()
from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(data_new)
X_std
array([[-3.46881819e-01, -3.21205453e-01, -6.35089011e-02, ...,
-8.67574189e-01, -5.01910084e-01, -3.18251941e-01],
[-2.10884040e-01, -3.87029908e-02, -2.88584214e-01, ...,
-5.44572203e-01, 1.66109850e-01, -5.51261842e-01],
[-4.06865631e-01, -3.76317928e-01, -4.78121319e-01, ...,
5.85934748e-01, -1.77289956e-01, -6.67766793e-01],
...,
[-2.33895071e-01, -4.23771558e-02, -9.15087008e-02, ...,
-2.21570217e-01, -2.56241250e-01, -9.59029170e-01],
[ 1.99171118e+00, 1.77256262e-01, 5.78332661e-01, ...,
2.12019418e+00, 5.88797079e+00, 1.95359460e+00],
[-3.26765760e-03, -6.68715889e-02, -9.58163623e-02, ...,
4.24433755e-01, -9.87115613e-01, 1.95359460e+00]])
df_s = pd.DataFrame(X_std,columns = data_new.columns)
df_s.head()
| Apps | Accept | Enroll | Top10perc | Top25perc | F.Undergrad | P.Undergrad | Outstate | Room.Board | Books | Personal | PhD | Terminal | S.F.Ratio | perc.alumni | Expend | Grad.Rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.346882 | -0.321205 | -0.063509 | -0.258583 | -0.191827 | -0.168116 | -0.209207 | -0.746356 | -0.964905 | -0.602312 | 1.270045 | -0.163028 | -0.115729 | 1.013776 | -0.867574 | -0.501910 | -0.318252 |
| 1 | -0.210884 | -0.038703 | -0.288584 | -0.655656 | -1.353911 | -0.209788 | 0.244307 | 0.457496 | 1.909208 | 1.215880 | 0.235515 | -2.675646 | -3.378176 | -0.477704 | -0.544572 | 0.166110 | -0.551262 |
| 2 | -0.406866 | -0.376318 | -0.478121 | -0.315307 | -0.292878 | -0.549565 | -0.497090 | 0.201305 | -0.554317 | -0.905344 | -0.259582 | -1.204845 | -0.931341 | -0.300749 | 0.585935 | -0.177290 | -0.667767 |
| 3 | -0.668261 | -0.681682 | -0.692427 | 1.840231 | 1.677612 | -0.658079 | -0.520752 | 0.626633 | 0.996791 | -0.602312 | -0.688173 | 1.185206 | 1.175657 | -1.615274 | 1.151188 | 1.792851 | -0.376504 |
| 4 | -0.726176 | -0.764555 | -0.780735 | -0.655656 | -0.596031 | -0.711924 | 0.009005 | -0.716508 | -0.216723 | 1.518912 | 0.235515 | 0.204672 | -0.523535 | -0.553542 | -1.675079 | 0.241803 | -2.939613 |
df_s.describe()
| Apps | Accept | Enroll | Top10perc | Top25perc | F.Undergrad | P.Undergrad | Outstate | Room.Board | Books | Personal | PhD | Terminal | S.F.Ratio | perc.alumni | Expend | Grad.Rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 | 7.770000e+02 |
| mean | 6.355797e-17 | 6.774575e-17 | -5.249269e-17 | -2.753232e-17 | -1.546739e-16 | -1.661405e-16 | -3.029180e-17 | 6.515595e-17 | 3.570717e-16 | -2.192583e-16 | 4.765243e-17 | 5.954768e-17 | -4.481615e-16 | -2.057556e-17 | -6.022638e-17 | 1.213101e-16 | 3.886495e-16 |
| std | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 | 1.000644e+00 |
| min | -7.551337e-01 | -7.947645e-01 | -8.022728e-01 | -1.506526e+00 | -2.364419e+00 | -7.346169e-01 | -5.615022e-01 | -2.014878e+00 | -2.351778e+00 | -2.747779e+00 | -1.611860e+00 | -3.962596e+00 | -3.785982e+00 | -2.929799e+00 | -1.836580e+00 | -1.240641e+00 | -3.230876e+00 |
| 25% | -5.754408e-01 | -5.775805e-01 | -5.793514e-01 | -7.123803e-01 | -7.476067e-01 | -5.586426e-01 | -4.997191e-01 | -7.762035e-01 | -6.939170e-01 | -4.810994e-01 | -7.251203e-01 | -6.532948e-01 | -5.915023e-01 | -6.546598e-01 | -7.868237e-01 | -5.574826e-01 | -7.260193e-01 |
| 50% | -3.732540e-01 | -3.710108e-01 | -3.725836e-01 | -2.585828e-01 | -9.077663e-02 | -4.111378e-01 | -3.301442e-01 | -1.120949e-01 | -1.437297e-01 | -2.992802e-01 | -2.078552e-01 | 1.433889e-01 | 1.561419e-01 | -1.237939e-01 | -1.408197e-01 | -2.458933e-01 | -2.698956e-02 |
| 75% | 1.609122e-01 | 1.654173e-01 | 1.314128e-01 | 4.221134e-01 | 6.671042e-01 | 6.294077e-02 | 7.341765e-02 | 6.179271e-01 | 6.318245e-01 | 3.067838e-01 | 5.310950e-01 | 7.562224e-01 | 8.358184e-01 | 6.093067e-01 | 6.666852e-01 | 2.241735e-01 | 7.302926e-01 |
| max | 1.165867e+01 | 9.924816e+00 | 6.043678e+00 | 3.882319e+00 | 2.233391e+00 | 5.764674e+00 | 1.378992e+01 | 2.800531e+00 | 3.436593e+00 | 1.085230e+01 | 8.068387e+00 | 1.859323e+00 | 1.379560e+00 | 6.499390e+00 | 3.331452e+00 | 8.924721e+00 | 3.060392e+00 |
plt.figure(figsize = (20,8))
df_s.boxplot()
<AxesSubplot:>
plt.figure(figsize=(5,5))
sns.boxplot(data=df['Top25perc']);
mean_vec = np.mean(X_std, axis=0)
cov_mat = (X_std - mean_vec).T.dot((X_std - mean_vec)) / (X_std.shape[0]-1)
print('Covariance matrix \n%s' %cov_mat)
Covariance matrix [[ 1.00128866 0.94466636 0.84791332 0.33927032 0.35209304 0.81554018 0.3987775 0.05022367 0.16515151 0.13272942 0.17896117 0.39120081 0.36996762 0.09575627 -0.09034216 0.2599265 0.14694372] [ 0.94466636 1.00128866 0.91281145 0.19269493 0.24779465 0.87534985 0.44183938 -0.02578774 0.09101577 0.11367165 0.20124767 0.35621633 0.3380184 0.17645611 -0.16019604 0.12487773 0.06739929] [ 0.84791332 0.91281145 1.00128866 0.18152715 0.2270373 0.96588274 0.51372977 -0.1556777 -0.04028353 0.11285614 0.28129148 0.33189629 0.30867133 0.23757707 -0.18102711 0.06425192 -0.02236983] [ 0.33927032 0.19269493 0.18152715 1.00128866 0.89314445 0.1414708 -0.10549205 0.5630552 0.37195909 0.1190116 -0.09343665 0.53251337 0.49176793 -0.38537048 0.45607223 0.6617651 0.49562711] [ 0.35209304 0.24779465 0.2270373 0.89314445 1.00128866 0.19970167 -0.05364569 0.49002449 0.33191707 0.115676 -0.08091441 0.54656564 0.52542506 -0.29500852 0.41840277 0.52812713 0.47789622] [ 0.81554018 0.87534985 0.96588274 0.1414708 0.19970167 1.00128866 0.57124738 -0.21602002 -0.06897917 0.11569867 0.31760831 0.3187472 0.30040557 0.28006379 -0.22975792 0.01867565 -0.07887464] [ 0.3987775 0.44183938 0.51372977 -0.10549205 -0.05364569 0.57124738 1.00128866 -0.25383901 -0.06140453 0.08130416 0.32029384 0.14930637 0.14208644 0.23283016 -0.28115421 -0.08367612 -0.25733218] [ 0.05022367 -0.02578774 -0.1556777 0.5630552 0.49002449 -0.21602002 -0.25383901 1.00128866 0.65509951 0.03890494 -0.29947232 0.38347594 0.40850895 -0.55553625 0.56699214 0.6736456 0.57202613] [ 0.16515151 0.09101577 -0.04028353 0.37195909 0.33191707 -0.06897917 -0.06140453 0.65509951 1.00128866 0.12812787 -0.19968518 0.32962651 0.3750222 -0.36309504 0.27271444 0.50238599 0.42548915] [ 0.13272942 0.11367165 0.11285614 0.1190116 0.115676 0.11569867 0.08130416 0.03890494 0.12812787 1.00128866 0.17952581 0.0269404 0.10008351 -0.03197042 -0.04025955 0.11255393 0.00106226] [ 0.17896117 0.20124767 0.28129148 -0.09343665 -0.08091441 0.31760831 0.32029384 -0.29947232 -0.19968518 0.17952581 1.00128866 -0.01094989 -0.03065256 0.13652054 -0.2863366 -0.09801804 -0.26969106] [ 0.39120081 0.35621633 0.33189629 0.53251337 0.54656564 0.3187472 0.14930637 0.38347594 0.32962651 0.0269404 -0.01094989 1.00128866 0.85068186 -0.13069832 0.24932955 0.43331936 0.30543094] [ 0.36996762 0.3380184 0.30867133 0.49176793 0.52542506 0.30040557 0.14208644 0.40850895 0.3750222 0.10008351 -0.03065256 0.85068186 1.00128866 -0.16031027 0.26747453 0.43936469 0.28990033] [ 0.09575627 0.17645611 0.23757707 -0.38537048 -0.29500852 0.28006379 0.23283016 -0.55553625 -0.36309504 -0.03197042 0.13652054 -0.13069832 -0.16031027 1.00128866 -0.4034484 -0.5845844 -0.30710565] [-0.09034216 -0.16019604 -0.18102711 0.45607223 0.41840277 -0.22975792 -0.28115421 0.56699214 0.27271444 -0.04025955 -0.2863366 0.24932955 0.26747453 -0.4034484 1.00128866 0.41825001 0.49153016] [ 0.2599265 0.12487773 0.06425192 0.6617651 0.52812713 0.01867565 -0.08367612 0.6736456 0.50238599 0.11255393 -0.09801804 0.43331936 0.43936469 -0.5845844 0.41825001 1.00128866 0.39084571] [ 0.14694372 0.06739929 -0.02236983 0.49562711 0.47789622 -0.07887464 -0.25733218 0.57202613 0.42548915 0.00106226 -0.26969106 0.30543094 0.28990033 -0.30710565 0.49153016 0.39084571 1.00128866]]
cov_mat = np.cov(X_std.T)
eig_vals, eig_vecs = np.linalg.eig(cov_mat)
print('\nNumber of Eigenvectors : ', len(eig_vecs))
print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)
Number of Eigenvectors : 17 Eigenvectors [[-2.48765602e-01 3.31598227e-01 6.30921033e-02 -2.81310530e-01 5.74140964e-03 1.62374420e-02 4.24863486e-02 1.03090398e-01 9.02270802e-02 -5.25098025e-02 3.58970400e-01 -4.59139498e-01 4.30462074e-02 -1.33405806e-01 8.06328039e-02 -5.95830975e-01 2.40709086e-02] [-2.07601502e-01 3.72116750e-01 1.01249056e-01 -2.67817346e-01 5.57860920e-02 -7.53468452e-03 1.29497196e-02 5.62709623e-02 1.77864814e-01 -4.11400844e-02 -5.43427250e-01 5.18568789e-01 -5.84055850e-02 1.45497511e-01 3.34674281e-02 -2.92642398e-01 -1.45102446e-01] [-1.76303592e-01 4.03724252e-01 8.29855709e-02 -1.61826771e-01 -5.56936353e-02 4.25579803e-02 2.76928937e-02 -5.86623552e-02 1.28560713e-01 -3.44879147e-02 6.09651110e-01 4.04318439e-01 -6.93988831e-02 -2.95896092e-02 -8.56967180e-02 4.44638207e-01 1.11431545e-02] [-3.54273947e-01 -8.24118211e-02 -3.50555339e-02 5.15472524e-02 -3.95434345e-01 5.26927980e-02 1.61332069e-01 1.22678028e-01 -3.41099863e-01 -6.40257785e-02 -1.44986329e-01 1.48738723e-01 -8.10481404e-03 -6.97722522e-01 -1.07828189e-01 -1.02303616e-03 3.85543001e-02] [-3.44001279e-01 -4.47786551e-02 2.41479376e-02 1.09766541e-01 -4.26533594e-01 -3.30915896e-02 1.18485556e-01 1.02491967e-01 -4.03711989e-01 -1.45492289e-02 8.03478445e-02 -5.18683400e-02 -2.73128469e-01 6.17274818e-01 1.51742110e-01 -2.18838802e-02 -8.93515563e-02] [-1.54640962e-01 4.17673774e-01 6.13929764e-02 -1.00412335e-01 -4.34543659e-02 4.34542349e-02 2.50763629e-02 -7.88896442e-02 5.94419181e-02 -2.08471834e-02 -4.14705279e-01 -5.60363054e-01 -8.11578181e-02 -9.91640992e-03 -5.63728817e-02 5.23622267e-01 5.61767721e-02] [-2.64425045e-02 3.15087830e-01 -1.39681716e-01 1.58558487e-01 3.02385408e-01 1.91198583e-01 -6.10423460e-02 -5.70783816e-01 -5.60672902e-01 2.23105808e-01 9.01788964e-03 5.27313042e-02 1.00693324e-01 -2.09515982e-02 1.92857500e-02 -1.25997650e-01 -6.35360730e-02] [-2.94736419e-01 -2.49643522e-01 -4.65988731e-02 -1.31291364e-01 2.22532003e-01 3.00003910e-02 -1.08528966e-01 -9.84599754e-03 4.57332880e-03 -1.86675363e-01 5.08995918e-02 -1.01594830e-01 1.43220673e-01 -3.83544794e-02 -3.40115407e-02 1.41856014e-01 -8.23443779e-01] [-2.49030449e-01 -1.37808883e-01 -1.48967389e-01 -1.84995991e-01 5.60919470e-01 -1.62755446e-01 -2.09744235e-01 2.21453442e-01 -2.75022548e-01 -2.98324237e-01 1.14639620e-03 2.59293381e-02 -3.59321731e-01 -3.40197083e-03 -5.84289756e-02 6.97485854e-02 3.54559731e-01] [-6.47575181e-02 5.63418434e-02 -6.77411649e-01 -8.70892205e-02 -1.27288825e-01 -6.41054950e-01 1.49692034e-01 -2.13293009e-01 1.33663353e-01 8.20292186e-02 7.72631963e-04 -2.88282896e-03 3.19400370e-02 9.43887925e-03 -6.68494643e-02 -1.14379958e-02 -2.81593679e-02] [ 4.25285386e-02 2.19929218e-01 -4.99721120e-01 2.30710568e-01 -2.22311021e-01 3.31398003e-01 -6.33790064e-01 2.32660840e-01 9.44688900e-02 -1.36027616e-01 -1.11433396e-03 1.28904022e-02 -1.85784733e-02 3.09001353e-03 2.75286207e-02 -3.94547417e-02 -3.92640266e-02] [-3.18312875e-01 5.83113174e-02 1.27028371e-01 5.34724832e-01 1.40166326e-01 -9.12555212e-02 1.09641298e-03 7.70400002e-02 1.85181525e-01 1.23452200e-01 1.38133366e-02 -2.98075465e-02 4.03723253e-02 1.12055599e-01 -6.91126145e-01 -1.27696382e-01 2.32224316e-02] [-3.17056016e-01 4.64294477e-02 6.60375454e-02 5.19443019e-01 2.04719730e-01 -1.54927646e-01 2.84770105e-02 1.21613297e-02 2.54938198e-01 8.85784627e-02 6.20932749e-03 2.70759809e-02 -5.89734026e-02 -1.58909651e-01 6.71008607e-01 5.83134662e-02 1.64850420e-02] [ 1.76957895e-01 2.46665277e-01 2.89848401e-01 1.61189487e-01 -7.93882496e-02 -4.87045875e-01 -2.19259358e-01 8.36048735e-02 -2.74544380e-01 -4.72045249e-01 -2.22215182e-03 2.12476294e-02 4.45000727e-01 2.08991284e-02 4.13740967e-02 1.77152700e-02 -1.10262122e-02] [-2.05082369e-01 -2.46595274e-01 1.46989274e-01 -1.73142230e-02 -2.16297411e-01 4.73400144e-02 -2.43321156e-01 -6.78523654e-01 2.55334907e-01 -4.22999706e-01 -1.91869743e-02 -3.33406243e-03 -1.30727978e-01 8.41789410e-03 -2.71542091e-02 -1.04088088e-01 1.82660654e-01] [-3.18908750e-01 -1.31689865e-01 -2.26743985e-01 -7.92734946e-02 7.59581203e-02 2.98118619e-01 2.26584481e-01 5.41593771e-02 4.91388809e-02 -1.32286331e-01 -3.53098218e-02 4.38803230e-02 6.92088870e-01 2.27742017e-01 7.31225166e-02 9.37464497e-02 3.25982295e-01] [-2.52315654e-01 -1.69240532e-01 2.08064649e-01 -2.69129066e-01 -1.09267913e-01 -2.16163313e-01 -5.59943937e-01 5.33553891e-03 -4.19043052e-02 5.90271067e-01 -1.30710024e-02 5.00844705e-03 2.19839000e-01 3.39433604e-03 3.64767385e-02 6.91969778e-02 1.22106697e-01]] Eigenvalues [5.45052162 4.48360686 1.17466761 1.00820573 0.93423123 0.84849117 0.6057878 0.58787222 0.53061262 0.4043029 0.02302787 0.03672545 0.31344588 0.08802464 0.1439785 0.16779415 0.22061096]
plt.figure(figsize=(15,10))
sns.heatmap(cov_mat, vmax=1, square=True,annot=True,cmap='cubehelix')
plt.title('Correlation between different features')
Text(0.5, 1.0, 'Correlation between different features')
eigenvec_df=pd.DataFrame(eig_vecs)
eigenvec_df.columns = ['eigvec_'+str(i+1) for i in range(0,len(eig_vecs))]
eigenvec_df.T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| eigvec_1 | -0.248766 | -0.207602 | -0.176304 | -0.354274 | -0.344001 | -0.154641 | -0.026443 | -0.294736 | -0.249030 | -0.064758 | 0.042529 | -0.318313 | -0.317056 | 0.176958 | -0.205082 | -0.318909 | -0.252316 |
| eigvec_2 | 0.331598 | 0.372117 | 0.403724 | -0.082412 | -0.044779 | 0.417674 | 0.315088 | -0.249644 | -0.137809 | 0.056342 | 0.219929 | 0.058311 | 0.046429 | 0.246665 | -0.246595 | -0.131690 | -0.169241 |
| eigvec_3 | 0.063092 | 0.101249 | 0.082986 | -0.035056 | 0.024148 | 0.061393 | -0.139682 | -0.046599 | -0.148967 | -0.677412 | -0.499721 | 0.127028 | 0.066038 | 0.289848 | 0.146989 | -0.226744 | 0.208065 |
| eigvec_4 | -0.281311 | -0.267817 | -0.161827 | 0.051547 | 0.109767 | -0.100412 | 0.158558 | -0.131291 | -0.184996 | -0.087089 | 0.230711 | 0.534725 | 0.519443 | 0.161189 | -0.017314 | -0.079273 | -0.269129 |
| eigvec_5 | 0.005741 | 0.055786 | -0.055694 | -0.395434 | -0.426534 | -0.043454 | 0.302385 | 0.222532 | 0.560919 | -0.127289 | -0.222311 | 0.140166 | 0.204720 | -0.079388 | -0.216297 | 0.075958 | -0.109268 |
| eigvec_6 | 0.016237 | -0.007535 | 0.042558 | 0.052693 | -0.033092 | 0.043454 | 0.191199 | 0.030000 | -0.162755 | -0.641055 | 0.331398 | -0.091256 | -0.154928 | -0.487046 | 0.047340 | 0.298119 | -0.216163 |
| eigvec_7 | 0.042486 | 0.012950 | 0.027693 | 0.161332 | 0.118486 | 0.025076 | -0.061042 | -0.108529 | -0.209744 | 0.149692 | -0.633790 | 0.001096 | 0.028477 | -0.219259 | -0.243321 | 0.226584 | -0.559944 |
| eigvec_8 | 0.103090 | 0.056271 | -0.058662 | 0.122678 | 0.102492 | -0.078890 | -0.570784 | -0.009846 | 0.221453 | -0.213293 | 0.232661 | 0.077040 | 0.012161 | 0.083605 | -0.678524 | 0.054159 | 0.005336 |
| eigvec_9 | 0.090227 | 0.177865 | 0.128561 | -0.341100 | -0.403712 | 0.059442 | -0.560673 | 0.004573 | -0.275023 | 0.133663 | 0.094469 | 0.185182 | 0.254938 | -0.274544 | 0.255335 | 0.049139 | -0.041904 |
| eigvec_10 | -0.052510 | -0.041140 | -0.034488 | -0.064026 | -0.014549 | -0.020847 | 0.223106 | -0.186675 | -0.298324 | 0.082029 | -0.136028 | 0.123452 | 0.088578 | -0.472045 | -0.423000 | -0.132286 | 0.590271 |
| eigvec_11 | 0.358970 | -0.543427 | 0.609651 | -0.144986 | 0.080348 | -0.414705 | 0.009018 | 0.050900 | 0.001146 | 0.000773 | -0.001114 | 0.013813 | 0.006209 | -0.002222 | -0.019187 | -0.035310 | -0.013071 |
| eigvec_12 | -0.459139 | 0.518569 | 0.404318 | 0.148739 | -0.051868 | -0.560363 | 0.052731 | -0.101595 | 0.025929 | -0.002883 | 0.012890 | -0.029808 | 0.027076 | 0.021248 | -0.003334 | 0.043880 | 0.005008 |
| eigvec_13 | 0.043046 | -0.058406 | -0.069399 | -0.008105 | -0.273128 | -0.081158 | 0.100693 | 0.143221 | -0.359322 | 0.031940 | -0.018578 | 0.040372 | -0.058973 | 0.445001 | -0.130728 | 0.692089 | 0.219839 |
| eigvec_14 | -0.133406 | 0.145498 | -0.029590 | -0.697723 | 0.617275 | -0.009916 | -0.020952 | -0.038354 | -0.003402 | 0.009439 | 0.003090 | 0.112056 | -0.158910 | 0.020899 | 0.008418 | 0.227742 | 0.003394 |
| eigvec_15 | 0.080633 | 0.033467 | -0.085697 | -0.107828 | 0.151742 | -0.056373 | 0.019286 | -0.034012 | -0.058429 | -0.066849 | 0.027529 | -0.691126 | 0.671009 | 0.041374 | -0.027154 | 0.073123 | 0.036477 |
| eigvec_16 | -0.595831 | -0.292642 | 0.444638 | -0.001023 | -0.021884 | 0.523622 | -0.125998 | 0.141856 | 0.069749 | -0.011438 | -0.039455 | -0.127696 | 0.058313 | 0.017715 | -0.104088 | 0.093746 | 0.069197 |
| eigvec_17 | 0.024071 | -0.145102 | 0.011143 | 0.038554 | -0.089352 | 0.056177 | -0.063536 | -0.823444 | 0.354560 | -0.028159 | -0.039264 | 0.023222 | 0.016485 | -0.011026 | 0.182661 | 0.325982 | 0.122107 |
from sklearn.decomposition import PCA
pca = PCA(n_components=8)
X_pca= pca.fit_transform(X_std)
X_pca.transpose()
array([[-1.59285540e+00, -2.19240180e+00, -1.43096371e+00, ...,
-7.32560596e-01, 7.91932735e+00, -4.69508066e-01],
[ 7.67333510e-01, -5.78829984e-01, -1.09281889e+00, ...,
-7.72352397e-02, -2.06832886e+00, 3.66660943e-01],
[-1.01073537e-01, 2.27879812e+00, -4.38092811e-01, ...,
-4.05641899e-04, 2.07356368e+00, -1.32891515e+00],
...,
[-2.98306081e-01, -1.77137309e-01, -9.60591689e-01, ...,
4.68014248e-01, -2.06993738e+00, 8.39893087e-01],
[ 6.38443468e-01, 2.36753302e-01, -2.48276091e-01, ...,
-1.31749158e+00, 8.33276555e-02, 1.30731260e+00],
[-8.79386137e-01, 4.69253269e-02, 3.08740489e-01, ...,
-1.28288447e-01, -5.52585842e-01, 6.27409633e-01]])
data_c = pd.DataFrame(pca.components_,columns = list(df_s))
data_c
| Apps | Accept | Enroll | Top10perc | Top25perc | F.Undergrad | P.Undergrad | Outstate | Room.Board | Books | Personal | PhD | Terminal | S.F.Ratio | perc.alumni | Expend | Grad.Rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.248766 | 0.207602 | 0.176304 | 0.354274 | 0.344001 | 0.154641 | 0.026443 | 0.294736 | 0.249030 | 0.064758 | -0.042529 | 0.318313 | 0.317056 | -0.176958 | 0.205082 | 0.318909 | 0.252316 |
| 1 | 0.331598 | 0.372117 | 0.403724 | -0.082412 | -0.044779 | 0.417674 | 0.315088 | -0.249644 | -0.137809 | 0.056342 | 0.219929 | 0.058311 | 0.046429 | 0.246665 | -0.246595 | -0.131690 | -0.169241 |
| 2 | -0.063092 | -0.101249 | -0.082986 | 0.035056 | -0.024148 | -0.061393 | 0.139682 | 0.046599 | 0.148967 | 0.677412 | 0.499721 | -0.127028 | -0.066038 | -0.289848 | -0.146989 | 0.226744 | -0.208065 |
| 3 | 0.281311 | 0.267817 | 0.161827 | -0.051547 | -0.109767 | 0.100412 | -0.158558 | 0.131291 | 0.184996 | 0.087089 | -0.230711 | -0.534725 | -0.519443 | -0.161189 | 0.017314 | 0.079273 | 0.269129 |
| 4 | 0.005741 | 0.055786 | -0.055694 | -0.395434 | -0.426534 | -0.043454 | 0.302385 | 0.222532 | 0.560919 | -0.127289 | -0.222311 | 0.140166 | 0.204720 | -0.079388 | -0.216297 | 0.075958 | -0.109268 |
| 5 | -0.016237 | 0.007535 | -0.042558 | -0.052693 | 0.033092 | -0.043454 | -0.191199 | -0.030000 | 0.162755 | 0.641055 | -0.331398 | 0.091256 | 0.154928 | 0.487046 | -0.047340 | -0.298119 | 0.216163 |
| 6 | -0.042486 | -0.012950 | -0.027693 | -0.161332 | -0.118486 | -0.025076 | 0.061042 | 0.108529 | 0.209744 | -0.149692 | 0.633790 | -0.001096 | -0.028477 | 0.219259 | 0.243321 | -0.226584 | 0.559944 |
| 7 | -0.103090 | -0.056271 | 0.058662 | -0.122678 | -0.102492 | 0.078890 | 0.570784 | 0.009846 | -0.221453 | 0.213293 | -0.232661 | -0.077040 | -0.012161 | -0.083605 | 0.678524 | -0.054159 | -0.005336 |
correl = data_c.corr()
correl
| Apps | Accept | Enroll | Top10perc | Top25perc | F.Undergrad | P.Undergrad | Outstate | Room.Board | Books | Personal | PhD | Terminal | S.F.Ratio | perc.alumni | Expend | Grad.Rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Apps | 1.000000 | 0.976862 | 0.865244 | 0.389443 | 0.344998 | 0.788892 | -0.268088 | -0.108111 | -0.066128 | -0.328058 | -0.124258 | -0.084422 | -0.175779 | -0.034633 | -0.360383 | 0.244745 | 0.010360 |
| Accept | 0.976862 | 1.000000 | 0.900902 | 0.225521 | 0.210724 | 0.838960 | -0.164102 | -0.203893 | -0.104933 | -0.421813 | -0.174346 | -0.058894 | -0.149572 | 0.086361 | -0.351331 | 0.095023 | -0.010818 |
| Enroll | 0.865244 | 0.900902 | 1.000000 | 0.306888 | 0.323216 | 0.985887 | 0.130705 | -0.435656 | -0.490004 | -0.325686 | -0.055797 | -0.009496 | -0.099455 | 0.087549 | -0.104308 | 0.037302 | -0.133982 |
| Top10perc | 0.389443 | 0.225521 | 0.306888 | 1.000000 | 0.969762 | 0.259747 | -0.343682 | 0.189781 | -0.206397 | 0.299676 | 0.083594 | 0.197573 | 0.150896 | -0.241697 | 0.180224 | 0.492554 | 0.184446 |
| Top25perc | 0.344998 | 0.210724 | 0.323216 | 0.969762 | 1.000000 | 0.290447 | -0.347025 | 0.069418 | -0.288854 | 0.313321 | 0.068945 | 0.302558 | 0.250222 | -0.014050 | 0.224918 | 0.284028 | 0.259746 |
| F.Undergrad | 0.788892 | 0.838960 | 0.985887 | 0.259747 | 0.290447 | 1.000000 | 0.252337 | -0.517323 | -0.545002 | -0.299667 | 0.000028 | 0.074912 | -0.009071 | 0.119136 | -0.106266 | 0.000471 | -0.230232 |
| P.Undergrad | -0.268088 | -0.164102 | 0.130705 | -0.343682 | -0.347025 | 0.252337 | 1.000000 | -0.236924 | -0.404850 | -0.251802 | 0.056410 | 0.205443 | 0.236724 | -0.240205 | 0.335388 | 0.058259 | -0.561162 |
| Outstate | -0.108111 | -0.203893 | -0.435656 | 0.189781 | 0.069418 | -0.517323 | -0.236924 | 1.000000 | 0.720192 | -0.300793 | -0.155159 | 0.135027 | 0.142212 | -0.540703 | 0.198058 | 0.591716 | 0.379277 |
| Room.Board | -0.066128 | -0.104933 | -0.490004 | -0.206397 | -0.288854 | -0.545002 | -0.404850 | 0.720192 | 1.000000 | -0.247763 | -0.073650 | 0.196583 | 0.211281 | -0.167806 | -0.448561 | 0.292063 | 0.180194 |
| Books | -0.328058 | -0.421813 | -0.325686 | 0.299676 | 0.313321 | -0.299667 | -0.251802 | -0.300793 | -0.247763 | 1.000000 | -0.066285 | -0.108586 | -0.017300 | 0.057742 | -0.120554 | -0.006089 | -0.321986 |
| Personal | -0.124258 | -0.174346 | -0.055797 | 0.083594 | 0.068945 | 0.000028 | 0.056410 | -0.155159 | -0.073650 | -0.066285 | 1.000000 | 0.030626 | -0.049266 | -0.053862 | -0.123147 | 0.006721 | 0.094284 |
| PhD | -0.084422 | -0.058894 | -0.009496 | 0.197573 | 0.302558 | 0.074912 | 0.205443 | 0.135027 | 0.196583 | -0.108586 | 0.030626 | 1.000000 | 0.987833 | 0.263893 | -0.034915 | 0.027374 | -0.063158 |
| Terminal | -0.175779 | -0.149572 | -0.099455 | 0.150896 | 0.250222 | -0.009071 | 0.236724 | 0.142212 | 0.211281 | -0.017300 | -0.049266 | 0.987833 | 1.000000 | 0.234930 | -0.033977 | 0.040665 | -0.145634 |
| S.F.Ratio | -0.034633 | 0.086361 | 0.087549 | -0.241697 | -0.014050 | 0.119136 | -0.240205 | -0.540703 | -0.167806 | 0.057742 | -0.053862 | 0.263893 | 0.234930 | 1.000000 | -0.133006 | -0.903388 | 0.303828 |
| perc.alumni | -0.360383 | -0.351331 | -0.104308 | 0.180224 | 0.224918 | -0.106266 | 0.335388 | 0.198058 | -0.448561 | -0.120554 | -0.123147 | -0.034915 | -0.033977 | -0.133006 | 1.000000 | -0.059557 | 0.392132 |
| Expend | 0.244745 | 0.095023 | 0.037302 | 0.492554 | 0.284028 | 0.000471 | 0.058259 | 0.591716 | 0.292063 | -0.006089 | 0.006721 | 0.027374 | 0.040665 | -0.903388 | -0.059557 | 1.000000 | -0.301228 |
| Grad.Rate | 0.010360 | -0.010818 | -0.133982 | 0.184446 | 0.259746 | -0.230232 | -0.561162 | 0.379277 | 0.180194 | -0.321986 | 0.094284 | -0.063158 | -0.145634 | 0.303828 | 0.392132 | -0.301228 | 1.000000 |
pca.explained_variance_
array([5.45052162, 4.48360686, 1.17466761, 1.00820573, 0.93423123,
0.84849117, 0.6057878 , 0.58787222])
pca.components_[0]
array([ 0.2487656 , 0.2076015 , 0.17630359, 0.35427395, 0.34400128,
0.15464096, 0.0264425 , 0.29473642, 0.24903045, 0.06475752,
-0.04252854, 0.31831287, 0.31705602, -0.17695789, 0.20508237,
0.31890875, 0.25231565])
print('The Linear eq of 1st component: ')
for i in range(0,data_c.shape[1]):
print('{} * {}'.format(np.round(pca.components_[0][i],2),data_c.columns[i]),end=' + ')
The Linear eq of 1st component: 0.25 * Apps + 0.21 * Accept + 0.18 * Enroll + 0.35 * Top10perc + 0.34 * Top25perc + 0.15 * F.Undergrad + 0.03 * P.Undergrad + 0.29 * Outstate + 0.25 * Room.Board + 0.06 * Books + -0.04 * Personal + 0.32 * PhD + 0.32 * Terminal + -0.18 * S.F.Ratio + 0.21 * perc.alumni + 0.32 * Expend + 0.25 * Grad.Rate +
tot = sum(eig_vals)
var_exp = [(i / tot)*100 for i in sorted(eig_vals, reverse=True)]
cum_var_exp = np.cumsum(var_exp)
cum_var_exp
array([ 32.0206282 , 58.36084263, 65.26175919, 71.18474841,
76.67315352, 81.65785448, 85.21672597, 88.67034731,
91.78758099, 94.16277251, 96.00419883, 97.30024023,
98.28599436, 99.13183669, 99.64896227, 99.86471628,
100. ])
fig = plt.figure(figsize=(8,5))
sing_vals = range(data_new.shape[1])
plt.plot(sing_vals, eig_vals, 'ro-', linewidth=2)
plt.plot(range(0,11),np.ones(11))
plt.title('Scree Plot')
plt.xlabel('Principal Component')
plt.ylabel('Eigenvalue')
plt.grid()
plt.show()
data_news.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| Apps | 777.0 | 6.355797e-17 | 1.000644 | -0.755134 | -0.575441 | -0.373254 | 0.160912 | 11.658671 |
| Accept | 777.0 | 6.774575e-17 | 1.000644 | -0.794764 | -0.577581 | -0.371011 | 0.165417 | 9.924816 |
| Enroll | 777.0 | -5.249269e-17 | 1.000644 | -0.802273 | -0.579351 | -0.372584 | 0.131413 | 6.043678 |
| Top10perc | 777.0 | -2.753232e-17 | 1.000644 | -1.506526 | -0.712380 | -0.258583 | 0.422113 | 3.882319 |
| Top25perc | 777.0 | -1.546739e-16 | 1.000644 | -2.364419 | -0.747607 | -0.090777 | 0.667104 | 2.233391 |
| F.Undergrad | 777.0 | -1.661405e-16 | 1.000644 | -0.734617 | -0.558643 | -0.411138 | 0.062941 | 5.764674 |
| P.Undergrad | 777.0 | -3.029180e-17 | 1.000644 | -0.561502 | -0.499719 | -0.330144 | 0.073418 | 13.789921 |
| Outstate | 777.0 | 6.515595e-17 | 1.000644 | -2.014878 | -0.776203 | -0.112095 | 0.617927 | 2.800531 |
| Room.Board | 777.0 | 3.570717e-16 | 1.000644 | -2.351778 | -0.693917 | -0.143730 | 0.631824 | 3.436593 |
| Books | 777.0 | -2.192583e-16 | 1.000644 | -2.747779 | -0.481099 | -0.299280 | 0.306784 | 10.852297 |
| Personal | 777.0 | 4.765243e-17 | 1.000644 | -1.611860 | -0.725120 | -0.207855 | 0.531095 | 8.068387 |
| PhD | 777.0 | 5.954768e-17 | 1.000644 | -3.962596 | -0.653295 | 0.143389 | 0.756222 | 1.859323 |
| Terminal | 777.0 | -4.481615e-16 | 1.000644 | -3.785982 | -0.591502 | 0.156142 | 0.835818 | 1.379560 |
| S.F.Ratio | 777.0 | -2.057556e-17 | 1.000644 | -2.929799 | -0.654660 | -0.123794 | 0.609307 | 6.499390 |
| perc.alumni | 777.0 | -6.022638e-17 | 1.000644 | -1.836580 | -0.786824 | -0.140820 | 0.666685 | 3.331452 |
| Expend | 777.0 | 1.213101e-16 | 1.000644 | -1.240641 | -0.557483 | -0.245893 | 0.224174 | 8.924721 |
| Grad.Rate | 777.0 | 3.886495e-16 | 1.000644 | -3.230876 | -0.726019 | -0.026990 | 0.730293 | 3.060392 |
plt.figure(figsize = (20,8))
data_new.boxplot()
<AxesSubplot:>
plt.figure(figsize = (20,8))
data_news.boxplot()
<AxesSubplot:>
from matplotlib.patches import Rectangle
fig,ax = plt.subplots(figsize=(22,10),facecolor='w',edgecolor='k')
ax = sns.heatmap(data_c,annot=True,vmax=1.0,cmap='Blues',cbar=False , yticklabels =['PC1','PC2','PC3','PC4','PC5','PC6','PC7','PC8'])
column_max = data_c.abs().idxmax(axis=0)
for col,variable in enumerate(data_c.columns):
position = data_c.index.get_loc(column_max[variable])
ax.add_patch(Rectangle((col,position),1,1, fill= False, edgecolor='red',lw=3))